{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 3,
   "id": "d71f182e",
   "metadata": {},
   "outputs": [],
   "source": [
    "# !pip install numpy_financial\n",
    "import pandas as pd\n",
    "import numpy as np\n",
    "import numpy_financial as npf\n",
    "import random\n",
    "import os"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "id": "da7bb515",
   "metadata": {},
   "outputs": [],
   "source": [
    "# Import data\n",
    "cf_loan_level = pd.read_csv('../data/interim/tab_cf_loan_level.csv')"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "aa34de7d",
   "metadata": {},
   "source": [
    "**IRR by treatment group**"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "id": "3a7fbd76",
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/var/folders/s2/5vmy8rps1bb8jf1nz8jdhlpm0000gn/T/ipykernel_48878/4024431262.py:6: FutureWarning: The default value of numeric_only in DataFrameGroupBy.sum is deprecated. In a future version, numeric_only will default to False. Either specify numeric_only or select only columns which should be valid for the function.\n",
      "  agg_cf = cf_loan_level_gbtreatday.agg('sum').reset_index()[['treatmenttype_encode','loandayselapsed','cf']]\n"
     ]
    }
   ],
   "source": [
    "#######################\n",
    "# Aggregate cash flow #\n",
    "#######################\n",
    "\n",
    "cf_loan_level_gbtreatday = cf_loan_level.groupby(['treatmenttype_encode','loandayselapsed'])\n",
    "agg_cf = cf_loan_level_gbtreatday.agg('sum').reset_index()[['treatmenttype_encode','loandayselapsed','cf']]\n",
    "agg_cf = agg_cf.sort_values(by=['treatmenttype_encode','loandayselapsed'])\n",
    "\n",
    "#################\n",
    "# Calculate IRR #\n",
    "#################\n",
    "\n",
    "irr_T1_L = npf.irr(agg_cf[agg_cf['treatmenttype_encode']=='T1_L']['cf'])*30\n",
    "irr_T2_U = npf.irr(agg_cf[agg_cf['treatmenttype_encode']=='T2_U']['cf'])*30\n",
    "irr_Old = npf.irr(agg_cf[agg_cf['treatmenttype_encode']=='Old']['cf'])*30\n"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "f2c98369",
   "metadata": {},
   "source": [
    "**IRR by treatment group & risk group**"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "id": "21bbe182",
   "metadata": {},
   "outputs": [],
   "source": [
    "#######################\n",
    "# Aggregate cash flow #\n",
    "#######################\n",
    "\n",
    "cf_loan_level_gbtreatday = cf_loan_level.groupby(['treatmenttype_encode','accountpercentlocked_group','loandayselapsed'])\n",
    "agg_cf = cf_loan_level_gbtreatday.agg('sum').reset_index()[['treatmenttype_encode','accountpercentlocked_group','loandayselapsed','cf']]\n",
    "agg_cf = agg_cf.sort_values(by=['treatmenttype_encode','accountpercentlocked_group','loandayselapsed'])\n",
    "\n",
    "#################\n",
    "# Calculate IRR #\n",
    "#################\n",
    "\n",
    "irr_T1_L_0_1_3 = npf.irr(agg_cf[(agg_cf['treatmenttype_encode']=='T1_L')&(agg_cf['accountpercentlocked_group']=='0_1_3')]['cf'])*30\n",
    "irr_T1_L_1_3_2_3 = npf.irr(agg_cf[(agg_cf['treatmenttype_encode']=='T1_L')&(agg_cf['accountpercentlocked_group']=='1_3_2_3')]['cf'])*30\n",
    "irr_T1_L_2_3_1 = npf.irr(agg_cf[(agg_cf['treatmenttype_encode']=='T1_L')&(agg_cf['accountpercentlocked_group']=='2_3_1')]['cf'])*30\n",
    "\n",
    "irr_T2_U_0_1_3 = npf.irr(agg_cf[(agg_cf['treatmenttype_encode']=='T2_U')&(agg_cf['accountpercentlocked_group']=='0_1_3')]['cf'])*30\n",
    "irr_T2_U_1_3_2_3 = npf.irr(agg_cf[(agg_cf['treatmenttype_encode']=='T2_U')&(agg_cf['accountpercentlocked_group']=='1_3_2_3')]['cf'])*30\n",
    "irr_T2_U_2_3_1 = npf.irr(agg_cf[(agg_cf['treatmenttype_encode']=='T2_U')&(agg_cf['accountpercentlocked_group']=='2_3_1')]['cf'])*30\n",
    "\n",
    "irr_Old_0_1_3 = npf.irr(agg_cf[(agg_cf['treatmenttype_encode']=='Old')&(agg_cf['accountpercentlocked_group']=='0_1_3')]['cf'])*30\n",
    "irr_Old_1_3_2_3 = npf.irr(agg_cf[(agg_cf['treatmenttype_encode']=='Old')&(agg_cf['accountpercentlocked_group']=='1_3_2_3')]['cf'])*30\n",
    "irr_Old_2_3_1 = npf.irr(agg_cf[(agg_cf['treatmenttype_encode']=='Old')&(agg_cf['accountpercentlocked_group']=='2_3_1')]['cf'])*30\n"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "988ac15a",
   "metadata": {},
   "source": [
    "**CI of IRR by treatment group**"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "id": "a21a0f90",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "0\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "<timed exec>:49: FutureWarning: The default value of numeric_only in DataFrameGroupBy.sum is deprecated. In a future version, numeric_only will default to False. Either specify numeric_only or select only columns which should be valid for the function.\n",
      "<timed exec>:49: FutureWarning: The default value of numeric_only in DataFrameGroupBy.sum is deprecated. In a future version, numeric_only will default to False. Either specify numeric_only or select only columns which should be valid for the function.\n",
      "<timed exec>:49: FutureWarning: The default value of numeric_only in DataFrameGroupBy.sum is deprecated. In a future version, numeric_only will default to False. Either specify numeric_only or select only columns which should be valid for the function.\n",
      "<timed exec>:49: FutureWarning: The default value of numeric_only in DataFrameGroupBy.sum is deprecated. In a future version, numeric_only will default to False. Either specify numeric_only or select only columns which should be valid for the function.\n",
      "<timed exec>:49: FutureWarning: The default value of numeric_only in DataFrameGroupBy.sum is deprecated. In a future version, numeric_only will default to False. Either specify numeric_only or select only columns which should be valid for the function.\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "CPU times: user 2min 1s, sys: 26.2 s, total: 2min 27s\n",
      "Wall time: 20.1 s\n"
     ]
    }
   ],
   "source": [
    "%%time\n",
    "\n",
    "BS_irr_bytreat = []\n",
    "BS_irr_bytreatrisk = []\n",
    "\n",
    "for i in range(1000):\n",
    "    \n",
    "    if i%10==0:\n",
    "        print(i)\n",
    "\n",
    "    ################################\n",
    "    # Get sample via bootstrapping #\n",
    "    ################################\n",
    "    \n",
    "    BS_cf_loan_level = pd.DataFrame()\n",
    "    \n",
    "    # Get bootstrapped T1_L\n",
    "    T1_L_loanids = list(set(list(cf_loan_level[cf_loan_level['treatmenttype_encode']=='T1_L']['loanid'])))\n",
    "    T1_L_loanids = random.choices(T1_L_loanids,k=len(T1_L_loanids))\n",
    "    T1_L_loanids = pd.DataFrame(T1_L_loanids,columns=['loanid'])\n",
    "    T1_L_loanids['loandayselapsed'] = [list(range(0,724))]*len(T1_L_loanids)\n",
    "    T1_L_loanids = T1_L_loanids.explode('loandayselapsed')\n",
    "    BS_cf_loan_level = pd.concat([BS_cf_loan_level,cf_loan_level.merge(T1_L_loanids,on=['loanid','loandayselapsed'],how='inner')])\n",
    "\n",
    "    # Get bootstrapped T2_U\n",
    "    T2_U_loanids = list(set(list(cf_loan_level[cf_loan_level['treatmenttype_encode']=='T2_U']['loanid'])))\n",
    "    T2_U_loanids = random.choices(T2_U_loanids,k=len(T2_U_loanids))\n",
    "    T2_U_loanids = pd.DataFrame(T2_U_loanids,columns=['loanid'])\n",
    "    T2_U_loanids['loandayselapsed'] = [list(range(0,724))]*len(T2_U_loanids)\n",
    "    T2_U_loanids = T2_U_loanids.explode('loandayselapsed')\n",
    "    BS_cf_loan_level = pd.concat([BS_cf_loan_level,cf_loan_level.merge(T2_U_loanids,on=['loanid','loandayselapsed'],how='inner')])\n",
    "\n",
    "    # Get bootstrapped Old\n",
    "    Old_loanids = list(set(list(cf_loan_level[cf_loan_level['treatmenttype_encode']=='Old']['loanid'])))\n",
    "    Old_loanids = random.choices(Old_loanids,k=len(Old_loanids))\n",
    "    Old_loanids = pd.DataFrame(Old_loanids,columns=['loanid'])\n",
    "    Old_loanids['loandayselapsed'] = [list(range(0,724))]*len(Old_loanids)\n",
    "    Old_loanids = Old_loanids.explode('loandayselapsed')\n",
    "    BS_cf_loan_level = pd.concat([BS_cf_loan_level,cf_loan_level.merge(Old_loanids,on=['loanid','loandayselapsed'],how='inner')])\n",
    "\n",
    "    # Explode messes up data type\n",
    "    BS_cf_loan_level['loandayselapsed'] = BS_cf_loan_level['loandayselapsed'].astype('int')\n",
    "\n",
    "\n",
    "\n",
    "    ####################################\n",
    "    # Calculate IRR by treatment group #\n",
    "    ####################################\n",
    "        \n",
    "    cf_loan_level_gbtreatday = BS_cf_loan_level.groupby(['treatmenttype_encode','loandayselapsed'])\n",
    "    agg_cf = cf_loan_level_gbtreatday.agg('sum').reset_index()[['treatmenttype_encode','loandayselapsed','cf']]\n",
    "    agg_cf = agg_cf.sort_values(by=['treatmenttype_encode','loandayselapsed'])\n",
    "    \n",
    "    #################\n",
    "    # Calculate IRR #\n",
    "    #################\n",
    "    \n",
    "    BS_irr_T1_L = npf.irr(agg_cf[agg_cf['treatmenttype_encode']=='T1_L']['cf'])*30\n",
    "    BS_irr_T2_U = npf.irr(agg_cf[agg_cf['treatmenttype_encode']=='T2_U']['cf'])*30\n",
    "    BS_irr_Old = npf.irr(agg_cf[agg_cf['treatmenttype_encode']=='Old']['cf'])*30\n",
    "\n",
    "    BS_irr_bytreat = BS_irr_bytreat+[[BS_irr_T1_L,BS_irr_T2_U,BS_irr_Old]]\n",
    "    \n",
    "    \n",
    "    \n",
    "    ###########################################\n",
    "    # Calculate IRR by treatment group & risk #\n",
    "    ###########################################\n",
    "\n",
    "    #---------------------#\n",
    "    # Aggregate cash flow #\n",
    "    #---------------------#\n",
    "\n",
    "    cf_loan_level_gbtreatday = BS_cf_loan_level.groupby(['treatmenttype_encode','accountpercentlocked_group','loandayselapsed'])\n",
    "    agg_cf = cf_loan_level_gbtreatday.agg('sum').reset_index()[['treatmenttype_encode','accountpercentlocked_group','loandayselapsed','cf']]\n",
    "    agg_cf = agg_cf.sort_values(by=['treatmenttype_encode','accountpercentlocked_group','loandayselapsed'])\n",
    "\n",
    "    #---------------#\n",
    "    # Calculate IRR #\n",
    "    #---------------#\n",
    "    \n",
    "    BS_irr_T1_L_0_1_3 = npf.irr(agg_cf[(agg_cf['treatmenttype_encode']=='T1_L')&(agg_cf['accountpercentlocked_group']=='0_1_3')]['cf'])*30\n",
    "    BS_irr_T1_L_1_3_2_3 = npf.irr(agg_cf[(agg_cf['treatmenttype_encode']=='T1_L')&(agg_cf['accountpercentlocked_group']=='1_3_2_3')]['cf'])*30\n",
    "    BS_irr_T1_L_2_3_1 = npf.irr(agg_cf[(agg_cf['treatmenttype_encode']=='T1_L')&(agg_cf['accountpercentlocked_group']=='2_3_1')]['cf'])*30\n",
    "    \n",
    "    BS_irr_T2_U_0_1_3 = npf.irr(agg_cf[(agg_cf['treatmenttype_encode']=='T2_U')&(agg_cf['accountpercentlocked_group']=='0_1_3')]['cf'])*30\n",
    "    BS_irr_T2_U_1_3_2_3 = npf.irr(agg_cf[(agg_cf['treatmenttype_encode']=='T2_U')&(agg_cf['accountpercentlocked_group']=='1_3_2_3')]['cf'])*30\n",
    "    BS_irr_T2_U_2_3_1 = npf.irr(agg_cf[(agg_cf['treatmenttype_encode']=='T2_U')&(agg_cf['accountpercentlocked_group']=='2_3_1')]['cf'])*30\n",
    "    \n",
    "    BS_irr_Old_0_1_3 = npf.irr(agg_cf[(agg_cf['treatmenttype_encode']=='Old')&(agg_cf['accountpercentlocked_group']=='0_1_3')]['cf'])*30\n",
    "    BS_irr_Old_1_3_2_3 = npf.irr(agg_cf[(agg_cf['treatmenttype_encode']=='Old')&(agg_cf['accountpercentlocked_group']=='1_3_2_3')]['cf'])*30\n",
    "    BS_irr_Old_2_3_1 = npf.irr(agg_cf[(agg_cf['treatmenttype_encode']=='Old')&(agg_cf['accountpercentlocked_group']=='2_3_1')]['cf'])*30\n",
    "\n",
    "    BS_irr_bytreatrisk = BS_irr_bytreatrisk+\\\n",
    "        [[BS_irr_T1_L_0_1_3,BS_irr_T1_L_1_3_2_3,BS_irr_T1_L_2_3_1,\n",
    "        BS_irr_T2_U_0_1_3,BS_irr_T2_U_1_3_2_3,BS_irr_T2_U_2_3_1,\n",
    "        BS_irr_Old_0_1_3,BS_irr_Old_1_3_2_3,BS_irr_Old_2_3_1\n",
    "        ]]\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "id": "45a8dae6",
   "metadata": {},
   "outputs": [],
   "source": [
    "BS_irr_bytreat = pd.DataFrame(BS_irr_bytreat,columns=['irr_T1_L','irr_T2_U','irr_Old'])\n",
    "BS_irr_bytreatrisk = pd.DataFrame(BS_irr_bytreatrisk,columns=[\n",
    "    'irr_T1_L_0_1_3','irr_T1_L_1_3_2_3','irr_T1_L_2_3_1',\n",
    "    'irr_T2_U_0_1_3','irr_T2_U_1_3_2_3','irr_T2_U_2_3_1',\n",
    "    'irr_Old_0_1_3','irr_Old_1_3_2_3','irr_Old_2_3_1',\n",
    "    ])"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "55c1bcac",
   "metadata": {},
   "source": [
    "**Export a table**"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "id": "7c4cc9fb",
   "metadata": {},
   "outputs": [],
   "source": [
    "table = '../tables/r_treatment_irr_withCI.tex'\n",
    "# os.remove(table)\n",
    "with open(table, 'w') as f:\n",
    "    \n",
    "    f.write('\\\\begin{table}[htbp]\\\\centering\\n')\n",
    "    f.write('\\\\small\\n')\n",
    "    f.write('\\\\def\\\\sym#1{\\\\ifmmode^{#1}\\\\else\\\\(^{#1}\\\\)\\\\fi}\\n')\n",
    "    f.write('\\\\caption{Monthly IRRs of Loan Portfolios \\\\label{tab:r_treatment_irr}}\\n')\n",
    "    f.write('\\\\begin{tabular}{l*{5}{c}}\\n')\n",
    "    f.write('\\\\toprule\\n')\n",
    "    f.write('Treatment Group  & \\\\multicolumn{3}{c}{\\\\begin{tabular}{@{}c@{}}\\\\underline{Account percent locked} \\\\\\\\ \\\\end{tabular}} & \\\\begin{tabular}{@{}c@{}}All \\\\\\\\ \\\\end{tabular} & $n$ \\\\\\\\\\n') \n",
    "    f.write('& 1st tercile & 2nd tercile & 3rd tercile & & \\\\\\\\\\\\addlinespace\\n')\n",
    "    f.write('& (1) & (2) & (3) & (4) \\\\\\\\\\n')\n",
    "    f.write('\\\\midrule\\n')\n",
    "    f.write('Secured & 0.2\\\\% & -2.5\\\\% & -8.4\\\\% & -3.7\\\\%  & 217 \\\\\\\\\\n')\n",
    "    f.write('&('+str(format(np.std(BS_irr_bytreatrisk[['irr_T1_L_0_1_3']]).reset_index(drop=True)[0]*100,'.1f'))+')&('+\n",
    "        str(format(np.std(BS_irr_bytreatrisk[['irr_T1_L_1_3_2_3']]).reset_index(drop=True)[0]*100,'.1f'))+')&('+\n",
    "        str(format(np.std(BS_irr_bytreatrisk[['irr_T1_L_2_3_1']]).reset_index(drop=True)[0]*100,'.1f'))+')&('+\n",
    "        str(format(np.std(BS_irr_bytreat[['irr_T1_L']]).reset_index(drop=True)[0]*100,'.1f'))+')\\\\\\\\ \\n')\n",
    "    f.write('& [0.00, 0.06] & [0.06, 0.19] & [0.19, 0.57] & [0.00, 0.57]  & \\\\\\\\ \\\\addlinespace \\n')\n",
    "    \n",
    "    f.write('Unsecured & -3.7  & -6.3 & -10.2 & -6.9 & 438 \\\\\\\\\\n')\n",
    "    f.write('&('+str(format(np.std(BS_irr_bytreatrisk[['irr_T2_U_0_1_3']]).reset_index(drop=True)[0]*100,'.1f'))+')&('+\n",
    "        str(format(np.std(BS_irr_bytreatrisk[['irr_T2_U_1_3_2_3']]).reset_index(drop=True)[0]*100,'.1f'))+')&('+\n",
    "        str(format(np.std(BS_irr_bytreatrisk[['irr_T2_U_2_3_1']]).reset_index(drop=True)[0]*100,'.1f'))+')&('+\n",
    "        str(format(np.std(BS_irr_bytreat[['irr_T2_U']]).reset_index(drop=True)[0]*100,'.1f'))+')\\\\\\\\ \\n')\n",
    "    f.write('& [0.00, 0.05] & [0.05, 0.19] & [0.19, 0.64] & [0.00, 0.64]  & \\\\\\\\ \\\\addlinespace \\n')\n",
    "    f.write('p-value & & & &'+\n",
    "        str(format(np.mean(BS_irr_bytreat['irr_T1_L']<BS_irr_bytreat['irr_T2_U']),'.3f'))+\n",
    "        '& \\\\\\\\ \\\\addlinespace \\n')\n",
    "    f.write('\\\\midrule\\n')\n",
    "    \n",
    "    f.write('Prior School-Fee & 6.6  & 6.0 & 3.2 & 5.1  & 1377 \\\\\\\\\\n')\n",
    "    f.write('Loans (Secured) &('+str(format(np.std(BS_irr_bytreatrisk[['irr_Old_0_1_3']]).reset_index(drop=True)[0]*100,'.1f'))+')&('+\n",
    "        str(format(np.std(BS_irr_bytreatrisk[['irr_Old_1_3_2_3']]).reset_index(drop=True)[0]*100,'.1f'))+')&('+\n",
    "        str(format(np.std(BS_irr_bytreatrisk[['irr_Old_2_3_1']]).reset_index(drop=True)[0]*100,'.1f'))+')&('+\n",
    "        str(format(np.std(BS_irr_bytreat[['irr_Old']]).reset_index(drop=True)[0]*100,'.1f'))+')\\\\\\\\ \\n')\n",
    "    f.write('& [0.00, 0.04] & [0.04, 0.13] & [0.13, 0.30] & [0.00, 0.30]  & \\\\\\\\ \\n')\n",
    "\n",
    "    f.write('\\\\addlinespace\\\\bottomrule\\n')\n",
    "    f.write('\\\\end{tabular}\\n')\n",
    "    f.write('\\\\end{table}\\n')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "4d1b14ee",
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.11.4"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}
